Addgrad

逐元素计算加法梯度

\[\begin{split}dx1 = \frac{\partial L}{\partial X1} = \frac{\partial L}{\partial Y} * 1 = \frac{\partial L}{\partial Y}\\ dx2 = \frac{\partial L}{\partial X2} = \frac{\partial L}{\partial Y} * 1 = \frac{\partial L}{\partial Y}\end{split}\]
输入:
  • dy - dy数据地址。

  • params - 参数打包成结构体。
    • dx1_dim - x1的维度信息。

    • dx2_dim - x2的维度信息。

    • dy_dims - dy的维度信息。

    • num_dims - 维度数

    • temp_space - 临时空间

  • core_mask - 核掩码。

输出:
  • dx1 - dx1的数据地址。

  • dx2 - dx2的数据地址。

支持平台:

FT78NE MT7004

备注

  • FT78NE 支持fp32

  • MT7004 支持fp16, fp32

  • dx1_dim, dx2_dim, dy_dims, num_dims按顺序打包成了一个long long型数组

共享存储版本:

void fp_add_grad_s(float *dy, float *dx1, float *dx2, long long *params, int core_mask)
void hp_add_grad_s(half *dy, half *dx1, half *dx2, long long *params, int core_mask)

C调用示例:

 1//FT78NE示例
 2#include <stdio.h>
 3#include <addgrad.h>
 4
 5int main(int argc, char* argv[]) {
 6    float *dy = (float *)0x81000000;
 7    float *dx1 = (float *)0x82000000;
 8    float *dx2 = (float *)0x83000000;
 9    int *tempspace = (int *)0x84000000;
10    int core_mask = 0b1111;
11
12    int i;
13
14    // same shape
15    int dx1_dims[] = {4, 8, 32};  //1024
16    int dx2_dims[] = {1, 8, 32};  //
17    int dy_dims[] = {4, 8, 32};  //
18    int num_dims = 3;
19
20    int dx1_num = get_total_elements(num_dims, dx1_dims);
21    int dx2_num = get_total_elements(num_dims, dx2_dims);
22    int dy_num  = get_total_elements(num_dims, dy_dims);
23
24    for (i = 0; i < dy_num; ++i) {
25        dy[i] = (float)(rand() % 100) / 10.0f;
26    }
27
28    long long params[6];
29    params[0] = (unsigned long long)dx1_dims;
30    params[1] = (unsigned long long)dx2_dims;
31    params[2] = (unsigned long long)dy_dims;
32    params[3] = (unsigned long long)num_dims;
33    params[4] = (unsigned long long)tempspace;
34
35    fp_add_grad_s(dy, dx1, dx2, params, core_mask);
36    return 0;
37}

私有存储版本:

void fp_add_grad_p(float *dy, float *dx1, float *dx2, long long *params)
void hp_add_grad_p(half *dy, half *dx1, half *dx2, long long *params)

C调用示例:

 1//FT78NE示例
 2#include <stdio.h>
 3#include <addgrad.h>
 4
 5int main(int argc, char* argv[]) {
 6    float *dy = (float *)0x10010000;
 7    float *dx1 = (float *)0x10020000;
 8    float *dx2 = (float *)0x10030000;
 9    int *tempspace = (int *)0x10040000;
10
11    int i;
12
13    // same shape
14    int dx1_dims[] = {4, 8, 32};  //1024
15    int dx2_dims[] = {1, 8, 32};  //
16    int dy_dims[] = {4, 8, 32};  //
17    int num_dims = 3;
18
19    int dx1_num = get_total_elements(num_dims, dx1_dims);
20    int dx2_num = get_total_elements(num_dims, dx2_dims);
21    int dy_num  = get_total_elements(num_dims, dy_dims);
22
23    for (i = 0; i < dy_num; ++i) {
24        dy[i] = (float)(rand() % 100) / 10.0f;
25    }
26
27    long long params[6];
28    params[0] = (unsigned long long)dx1_dims;
29    params[1] = (unsigned long long)dx2_dims;
30    params[2] = (unsigned long long)dy_dims;
31    params[3] = (unsigned long long)num_dims;
32    params[4] = (unsigned long long)tempspace;
33
34    fp_add_grad_p(dy, dx1, dx2, params);
35    return 0;
36}